#Importing packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
df =pd.read_csv('SUPPLY DATA_Merged.csv')
df.head()
| Product type | SKU | Price | Availability | Number of products sold | Revenue generated | Customer demographics | Stock levels | Lead times | Order quantities | ... | Inspection results | Defect rates | Transportation modes | Routes | Costs | Scheduled Delivery Date | Delivered to Client Date | Delivery Recorded Date | Product Group | Brand | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | haircare | SKU0 | 69.808006 | 55 | 802 | 8661.996792 | Non-binary | 58 | 7 | 96 | ... | Pending | 0.226410 | Road | Route B | 187.752075 | 2-Jun-06 | 2-Jun-06 | 2-Jun-06 | HRDT | Loreal |
| 1 | skincare | SKU1 | 14.843523 | 95 | 736 | 7460.900065 | Female | 53 | 30 | 37 | ... | Pending | 4.854068 | Road | Route B | 503.065579 | 14-Nov-06 | 14-Nov-06 | 14-Nov-06 | ARV | Cetaphil |
| 2 | haircare | SKU2 | 11.319683 | 34 | 8 | 9577.749626 | Unknown | 1 | 10 | 88 | ... | Pending | 4.580593 | Air | Route C | 141.920282 | 27-Aug-06 | 27-Aug-06 | 27-Aug-06 | HRDT | Tresemme |
| 3 | skincare | SKU3 | 61.163343 | 68 | 83 | 7766.836426 | Non-binary | 23 | 13 | 59 | ... | Fail | 4.746649 | Rail | Route A | 254.776159 | 1-Sep-06 | 1-Sep-06 | 1-Sep-06 | ARV | Nivea |
| 4 | skincare | SKU4 | 4.805496 | 26 | 871 | 2686.505152 | Non-binary | 5 | 3 | 56 | ... | Fail | 3.145580 | Air | Route A | 923.440632 | 11-Aug-06 | 11-Aug-06 | 11-Aug-06 | ARV | Clinique |
5 rows × 29 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 100 entries, 0 to 99 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Product type 100 non-null object 1 SKU 100 non-null object 2 Price 100 non-null float64 3 Availability 100 non-null int64 4 Number of products sold 100 non-null int64 5 Revenue generated 100 non-null float64 6 Customer demographics 100 non-null object 7 Stock levels 100 non-null int64 8 Lead times 100 non-null int64 9 Order quantities 100 non-null int64 10 Shipping times 100 non-null int64 11 Shipping carriers 100 non-null object 12 Shipping costs 100 non-null float64 13 Supplier name 100 non-null object 14 Location 100 non-null object 15 Lead time 100 non-null int64 16 Production volumes 100 non-null int64 17 Manufacturing lead time 100 non-null int64 18 Manufacturing costs 100 non-null float64 19 Inspection results 100 non-null object 20 Defect rates 100 non-null float64 21 Transportation modes 100 non-null object 22 Routes 100 non-null object 23 Costs 100 non-null float64 24 Scheduled Delivery Date 100 non-null object 25 Delivered to Client Date 100 non-null object 26 Delivery Recorded Date 100 non-null object 27 Product Group 100 non-null object 28 Brand 100 non-null object dtypes: float64(6), int64(9), object(14) memory usage: 22.8+ KB
print('No. of rows and columns in dataset',df.shape)
No. of rows and columns in dataset (100, 29)
#To check the statistics about the dataset
df.describe().style.background_gradient(cmap='autumn_r')
| Price | Availability | Number of products sold | Revenue generated | Stock levels | Lead times | Order quantities | Shipping times | Shipping costs | Lead time | Production volumes | Manufacturing lead time | Manufacturing costs | Defect rates | Costs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 |
| mean | 49.462461 | 48.400000 | 460.990000 | 5776.048187 | 47.770000 | 15.960000 | 49.220000 | 5.750000 | 5.548149 | 17.080000 | 567.840000 | 14.770000 | 47.266693 | 2.277158 | 529.245782 |
| std | 31.168193 | 30.743317 | 303.780074 | 2732.841744 | 31.369372 | 8.785801 | 26.784429 | 2.724283 | 2.651376 | 8.846251 | 263.046861 | 8.912430 | 28.982841 | 1.461366 | 258.301696 |
| min | 1.699976 | 1.000000 | 8.000000 | 1061.618523 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.013487 | 1.000000 | 104.000000 | 1.000000 | 1.085069 | 0.018608 | 103.916248 |
| 25% | 19.597823 | 22.750000 | 184.250000 | 2812.847151 | 16.750000 | 8.000000 | 26.000000 | 3.750000 | 3.540248 | 10.000000 | 352.000000 | 7.000000 | 22.983299 | 1.009650 | 318.778455 |
| 50% | 51.239830 | 43.500000 | 392.500000 | 6006.352023 | 47.500000 | 17.000000 | 52.000000 | 6.000000 | 5.320534 | 18.000000 | 568.500000 | 14.000000 | 45.905622 | 2.141863 | 520.430444 |
| 75% | 77.198228 | 75.000000 | 704.250000 | 8253.976920 | 73.000000 | 24.000000 | 71.250000 | 8.000000 | 7.601695 | 25.000000 | 797.000000 | 23.000000 | 68.621026 | 3.563995 | 763.078231 |
| max | 99.171329 | 100.000000 | 996.000000 | 9866.465458 | 100.000000 | 30.000000 | 96.000000 | 10.000000 | 9.929816 | 30.000000 | 985.000000 | 30.000000 | 99.466109 | 4.939255 | 997.413450 |
#To find some correlation
plt.figure(figsize=(16,7))
sns.heatmap(df.corr(),annot=True,cmap='autumn_r')
plt.show()
#To check any null values in the dataset
df.isna().sum()/len(df)
Product type 0.0 SKU 0.0 Price 0.0 Availability 0.0 Number of products sold 0.0 Revenue generated 0.0 Customer demographics 0.0 Stock levels 0.0 Lead times 0.0 Order quantities 0.0 Shipping times 0.0 Shipping carriers 0.0 Shipping costs 0.0 Supplier name 0.0 Location 0.0 Lead time 0.0 Production volumes 0.0 Manufacturing lead time 0.0 Manufacturing costs 0.0 Inspection results 0.0 Defect rates 0.0 Transportation modes 0.0 Routes 0.0 Costs 0.0 Scheduled Delivery Date 0.0 Delivered to Client Date 0.0 Delivery Recorded Date 0.0 Product Group 0.0 Brand 0.0 dtype: float64
#Separating numerical and the categorical columns
categorical=[x for x in df.columns if df[x].dtypes=='object']
numerical=[x for x in df.columns if df[x].dtypes!='object']
print(categorical)
print(numerical)
['Product type', 'SKU', 'Customer demographics', 'Shipping carriers', 'Supplier name', 'Location', 'Inspection results', 'Transportation modes', 'Routes', 'Scheduled Delivery Date', 'Delivered to Client Date', 'Delivery Recorded Date', 'Product Group', 'Brand'] ['Price', 'Availability', 'Number of products sold', 'Revenue generated', 'Stock levels', 'Lead times', 'Order quantities', 'Shipping times', 'Shipping costs', 'Lead time', 'Production volumes', 'Manufacturing lead time', 'Manufacturing costs', 'Defect rates', 'Costs']
EDA
#Using count plots, visualizing the categorical columns
for x in df.select_dtypes(include='object'):
sns.countplot(data=df,x=df[x])
plt.xticks(rotation=85)
plt.show()
#Using Boxplot, visualizing the numerical columns
plt.figure(figsize=(14,6))
df.boxplot(grid=False, rot=50, fontsize=14)
<AxesSubplot:>
#Bar chart for each product type with Revenue generated
ax = df.groupby(['Product type'])[['Revenue generated']].sum()\
.plot(kind='bar', figsize=(15, 7), title="Revenue generated by Product type", fontsize=18, color=['green'])
ax.set_xlabel("Product Type", fontsize=14)
ax.set_ylabel("Revenue generated", fontsize=14)
# Add data labels
for p in ax.patches:
ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.show()
#Bar chart for each product type with Order quantities
ax = df.groupby(['Product type'])[['Order quantities']].sum()\
.plot(kind='bar', figsize=(15, 7), title="Demand graph by Product type", fontsize=18, color=['orange'])
ax.set_xlabel("Product Type", fontsize=14)
ax.set_ylabel("Order quantities", fontsize=14)
# Add data labels
for p in ax.patches:
ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.show()
#Bar chart for each product type with availability
ax = df.groupby(['Product type'])[['Availability']].sum()\
.plot(kind='bar', figsize=(15, 7), title="Supply graph by Product type", fontsize=18, color=['blue'])
ax.set_xlabel("Product Type", fontsize=14)
ax.set_ylabel("Availability", fontsize=14)
# Add data labels
for p in ax.patches:
ax.annotate(format(p.get_height(), '.2f'), (p.get_x() + p.get_width() / 2., p.get_height()),
ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')
plt.show()
# Scatter plot for demand and supply
plt.scatter(df['Order quantities'], df['Availability'])
# Calculate the best-fit line
fit = np.polyfit(df['Order quantities'], df['Availability'], 1)
line = np.poly1d(fit)
# Plot the best-fit line
plt.plot(df['Order quantities'], line(df['Order quantities']), color='red')
# Set labels and title
plt.xlabel('Order quantities')
plt.ylabel('Availability')
plt.title('Demand vs Supply')
# Display the plot
plt.show()
Observations
#Which brand generated more revenue by product
df.groupby(['Brand','Product type'])[['Revenue generated']].sum()\
.sort_index()\
.sort_values(by='Brand',ascending=False)\
.unstack()\
.style.background_gradient(cmap='rainbow')
| Revenue generated | |||
|---|---|---|---|
| Product type | cosmetics | haircare | skincare |
| Brand | |||
| Cetaphil | nan | nan | 89366.954339 |
| Clinique | nan | nan | 54138.670559 |
| Dove | nan | 13097.824623 | nan |
| Loreal | nan | 86006.445676 | nan |
| Mac | 41408.875079 | nan | nan |
| Maybelline | 63995.436594 | nan | nan |
| Nivea | nan | nan | 98122.537235 |
| Revlon | 56116.954328 | nan | nan |
| Tresemme | nan | 75351.120307 | nan |
#Categorizing each product by total price, available quantities
df.groupby(['Product type']).sum()\
.sort_index()\
.style.background_gradient(cmap='autumn_r')
| Price | Availability | Number of products sold | Revenue generated | Stock levels | Lead times | Order quantities | Shipping times | Shipping costs | Lead time | Production volumes | Manufacturing lead time | Manufacturing costs | Defect rates | Costs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Product type | |||||||||||||||
| cosmetics | 1491.387498 | 1332 | 11757 | 161521.266001 | 1525 | 400 | 1343 | 171 | 157.563663 | 352 | 12461 | 346 | 1119.371253 | 49.901461 | 13366.397283 |
| haircare | 1564.485482 | 1471 | 13611 | 174455.390606 | 1644 | 528 | 1480 | 191 | 200.863735 | 636 | 19957 | 580 | 1647.571776 | 84.427107 | 17328.862865 |
| skincare | 1890.373155 | 2037 | 20731 | 241628.162133 | 1608 | 668 | 2099 | 213 | 196.387510 | 720 | 24366 | 551 | 1959.726295 | 93.387231 | 22229.318068 |
#Categorizing each product by total price, available quantities
df.groupby(['Brand']).sum()\
.sort_index()\
.style.background_gradient(cmap='autumn_r')
| Price | Availability | Number of products sold | Revenue generated | Stock levels | Lead times | Order quantities | Shipping times | Shipping costs | Lead time | Production volumes | Manufacturing lead time | Manufacturing costs | Defect rates | Costs | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Brand | |||||||||||||||
| Cetaphil | 679.180064 | 829 | 7064 | 89366.954339 | 531 | 259 | 814 | 75 | 74.715577 | 276 | 7835 | 214 | 688.558450 | 36.760952 | 8001.578017 |
| Clinique | 430.836306 | 489 | 5616 | 54138.670559 | 456 | 175 | 475 | 61 | 46.296781 | 190 | 6605 | 134 | 445.792529 | 20.603057 | 5114.036373 |
| Dove | 159.997358 | 206 | 1722 | 13097.824623 | 295 | 110 | 288 | 26 | 23.300682 | 61 | 1233 | 67 | 159.804923 | 11.050097 | 2269.530461 |
| Loreal | 814.539108 | 724 | 7420 | 86006.445676 | 753 | 183 | 730 | 85 | 88.893551 | 274 | 9527 | 275 | 828.304701 | 36.014933 | 7518.460083 |
| Mac | 338.603986 | 414 | 3246 | 41408.875079 | 320 | 140 | 473 | 53 | 42.887890 | 102 | 2763 | 80 | 354.598806 | 18.848161 | 4446.638071 |
| Maybelline | 656.803226 | 552 | 5559 | 63995.436594 | 737 | 112 | 477 | 74 | 77.802552 | 139 | 5856 | 142 | 350.769597 | 17.444996 | 5490.698963 |
| Nivea | 780.356785 | 719 | 8051 | 98122.537235 | 621 | 234 | 810 | 77 | 75.375152 | 254 | 9926 | 203 | 825.375316 | 36.023222 | 9113.703678 |
| Revlon | 495.980285 | 366 | 2952 | 56116.954328 | 468 | 148 | 393 | 44 | 36.873220 | 111 | 3842 | 124 | 414.002850 | 13.608304 | 3429.060249 |
| Tresemme | 589.949016 | 541 | 4469 | 75351.120307 | 596 | 235 | 462 | 80 | 88.669501 | 301 | 9197 | 238 | 659.462152 | 37.362076 | 7540.872320 |
# Grouping the data by 'Product type' and calculating the sum of 'Defect rates'
grouped_data = df.groupby(['Product type'])['Defect rates'].sum()
# Sorting the data in descending order
sorted_data = grouped_data.sort_values(ascending=False)
# Creating a pie chart
labels = ['skincare', 'haircare', 'cosmetics']
sorted_data.plot(kind='pie', labels=labels, autopct='%1.1f%%', title = "Defect rates by Product type", fontsize= 16, figsize=(15,7))
# Displaying the pie chart
plt.show()
import pandas as pd
import plotly.express as px
# Creating the pivot table
pivot_table = pd.pivot_table(df, values='Defect rates', index=['Transportation modes'], aggfunc='mean')
# Creating the pie chart
transportation_chart = px.pie(values=pivot_table["Defect rates"], names=pivot_table.index,
title='Defect Rates by Transportation Mode', hole=0.5,
color_discrete_sequence=px.colors.qualitative.Pastel)
# Displaying the pie chart
transportation_chart.show()
Calculations
mean_customer_demand = df['Order quantities'].mean()
mean_available_production_time = df['Manufacturing lead time'].mean()
print("Customer_demand:", mean_customer_demand)
print("Available_production_time:", mean_available_production_time)
Customer_demand: 49.22 Available_production_time: 14.77
Calculation:1 TAKT TIME
# Calculating the takt time
Available_production_time = 14.77
Customer_demand = 49.22
takt_time = Available_production_time / Customer_demand
print("Takt Time:", takt_time, "minutes per unit")
#Calculating minimum workstations
import math
def calculate_workstations(takt_time, processing_time):
return math.ceil(takt_time / processing_time)
# Example usage
takt_time = 3600 # Takt time in seconds
processing_time = 300 # Processing time per unit in seconds
workstations = calculate_workstations(takt_time, processing_time)
print("Minimum number of workstations required:", workstations)
def calculate_idle_time(total_production_time, actual_working_time):
idle_time = total_production_time - actual_working_time
percent_idle_time = (idle_time / total_production_time) * 100
efficiency_delay = (idle_time / actual_working_time) * 100
return idle_time, percent_idle_time, efficiency_delay
# Example usage
total_production_time = 480 # Total production time in minutes
actual_working_time = 380 # Actual working time in minutes
idle_time, percent_idle_time, efficiency_delay = calculate_idle_time(total_production_time, actual_working_time)
print("Idle Time:", idle_time, "minutes")
print("Percent Idle Time:", percent_idle_time, "%")
print("Efficiency Delay:", efficiency_delay, "%")
Takt Time: 0.3000812677773263 minutes per unit Minimum number of workstations required: 12 Idle Time: 100 minutes Percent Idle Time: 20.833333333333336 % Efficiency Delay: 26.31578947368421 %
Calculation:2 QUEUE THEORY
#Calculating Queue Theory
mean_arrival_rate = df['Shipping times'].mean()
mean_service_rate = df['Lead times'].mean()
print("Mean_product_arrival_time:", mean_arrival_rate)
print("Mean_product_service_rate:", mean_service_rate)
Average_number_of_customers_in_system = mean_arrival_rate/ (mean_service_rate-mean_arrival_rate)
print("L(Average_number_of_customers_in_system):", Average_number_of_customers_in_system)
Average_time_a_customer_spends_in_system = 1/(mean_service_rate-mean_arrival_rate)
print("W(Average_time_a_customer_spends_in_system):", Average_time_a_customer_spends_in_system)
Average_number_of_customers_in_queue = (mean_arrival_rate*mean_arrival_rate)/((mean_service_rate)*(mean_service_rate-mean_arrival_rate))
print("Lq(Average_number_of_customers_in_queue):", Average_number_of_customers_in_queue)
Average_time_a_customer_spends_waiting = mean_arrival_rate/ (mean_service_rate* (mean_service_rate- mean_arrival_rate))
print("Wq(Average_time_a_customer_spends_waiting):", Average_time_a_customer_spends_waiting)
Utilization_Factor = mean_arrival_rate/ mean_service_rate
print("P(Utilization_Factor):", Utilization_Factor)
Percent_Idle_Time = 1- mean_arrival_rate/mean_service_rate
print("P(Percent_Idle_Time):(No customers in system):", Percent_Idle_Time)
for num_products in range(5): # Iterate from 0 to 4
probability = ((mean_arrival_rate / mean_service_rate) ** num_products) * Percent_Idle_Time
print(f"Probability of {num_products} products in the system:", probability)
Mean_product_arrival_time: 5.75 Mean_product_service_rate: 15.96 L(Average_number_of_customers_in_system): 0.563173359451518 W(Average_time_a_customer_spends_in_system): 0.0979431929480901 Lq(Average_number_of_customers_in_queue): 0.20289767022846045 Wq(Average_time_a_customer_spends_waiting): 0.03528655134408008 P(Utilization_Factor): 0.36027568922305764 P(Percent_Idle_Time):(No customers in system): 0.6397243107769424 Probability of 0 products in the system: 0.6397243107769424 Probability of 1 products in the system: 0.23047711697790843 Probability of 2 products in the system: 0.08303530216935924 Probability of 3 products in the system: 0.029915600718910754 Probability of 4 products in the system: 0.01077786366752737
Calculation:3 MANUFACURING OF LOREAL MAP PROCESS CALCULATION
df1 =pd.read_csv('supplychain_manufacturing.csv')
df1.head()
| Manufacturing Process | Time (in minutes) | |
|---|---|---|
| 0 | Formula Ingredients Extraction | 240.0 |
| 1 | Formula Ingredients Transportation | 480.0 |
| 2 | Product Manufacturing | 260.0 |
| 3 | Storage at Distribution Center | 600.0 |
| 4 | Storage at Retailer | 800.0 |
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 11 entries, 0 to 10 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Manufacturing Process 11 non-null object 1 Time (in minutes) 8 non-null float64 dtypes: float64(1), object(1) memory usage: 304.0+ bytes
#To check any null values in the dataset
df1.isna().sum()
Manufacturing Process 0 Time (in minutes) 3 dtype: int64
# Define the times for each process
times = [
240, # Formula Ingredients Extraction
480, # Formula Ingredients Transportation
260, # Product Manufacturing
600, # Storage at Distribution Center
800, # Storage at Retailer
0, # Product Use (unknown time, set as 0 for demonstration)
0, # Formula End Of Life (unknown time, set as 0 for demonstration)
2000, # Packaging Recycling
0, # Packaging End Of Life (unknown time, set as 0 for demonstration)
550, # Packaging Production
300 # Packaging Materials Extraction
]
# Calculate total time for the whole process
total_time = 0
for time in times:
total_time += time
print("Total Time:", total_time, "minutes")
Total Time: 5230 minutes
Calculation:4 MEASURING BUSINESS PROCESSES- PART-1
#Calculate Multifactor Productivity Ratio
mean_revenue_generated = df['Revenue generated'].mean()
mean_manufacturing_costs = df['Manufacturing costs'].mean()
mean_costs = df['Costs'].mean()
print("Revenue generated:", mean_revenue_generated)
print("Manufacturing costs:", mean_manufacturing_costs)
print("Costs:", mean_costs)
Multifactor_Productivity_Ratio = mean_revenue_generated/(mean_manufacturing_costs+ mean_costs)
print("Multifactor_Productivity_Ratio is:", Multifactor_Productivity_Ratio)
Revenue generated: 5776.0481874 Manufacturing costs: 47.26669324143001 Costs: 529.2457821540002 Multifactor_Productivity_Ratio is: 10.01894743637283
Calculation:5 MEASURING BUSINESS PROCESSES- PART-2
#Calculate Efficiency Ratio
mean_actual_outputs = df['Number of products sold'].mean()
mean_standard_outputs = df['Production volumes'].mean()
print("Number of products sold:", mean_actual_outputs)
print("Production volumes:", mean_standard_outputs)
Efficiency_Ratio = mean_actual_outputs/mean_standard_outputs
print("Efficiency_Ratio is:", Efficiency_Ratio)
Efficiency_Ratio_Percentage = (mean_actual_outputs/mean_standard_outputs) * 100
print("Efficiency_Ratio Percentage is:", Efficiency_Ratio_Percentage)
Number of products sold: 460.99 Production volumes: 567.84 Efficiency_Ratio is: 0.8118307974077205 Efficiency_Ratio Percentage is: 81.18307974077204
Calculation:6 VALUE INDEX : SOLVED IN EXCEL (SEE ATTACHED)